import jieba
from collections import Counter

# 读取temp.txt文件
try:
    with open('temp.txt', 'r', encoding='utf-8') as f:
        text = f.read()
    print("文件读取成功")
except Exception as e:
    print(f"读取文件时出错: {e}")
    exit()

# 使用jieba进行分词
try:
    words = jieba.lcut(text)
    print("分词完成")
except Exception as e:
    print(f"分词时出错: {e}")
    exit()

# 过滤词组，只保留长度大于1的词
filtered_words = [word for word in words if len(word) > 1]

# 统计词频
word_count = Counter(filtered_words)

# 获取高频词汇并按频率排序
most_common_words = word_count.most_common(30)  # 显示前30个高频词

# 打印结果
print("\n《红楼梦》高频词汇列表（按词频从高到低）：")
print("=" * 40)
for i, (word, count) in enumerate(most_common_words, 1):
    print(f"{i:2d}. {word:<12} {count:>4}次")